Created by A.J. Brown\ 27 Jan 2022\ Using Python 3.9.6\ Jupyter Lab 3.2.1\ Version 0.7
The following script is a tool to look at water quality data using the excel template that Emmanuel Deleon has developed for all water quality data. These graphs show data from ARDEC 2200 in 2019. Each script created would contain data from a new field. This script could be used by 1) the AWQP staff to look at preliminary data insights, and 2) stakeholders after QA/QC to allow them to look at publishable insights.
# To convert to html and hide code, run the following console command in the same directory as the ipynb:
# jupyter nbconvert YourNotebook.ipynb --no-input --to html
# DO NOT use the export in Jupyter Lab
# TODO: make scatterplot colorize by sample type
# TODO: add ols regression lines to scatterplot
# Import Python Modules
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tabulate
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from tkinter import filedialog
from scipy import stats
# Import csv data (e.g., Berthoud_filter.csv)
df = pd.read_csv(r"C:\Users\ansleybr\OneDrive - Colostate\Documents\GitHub\AWQP-Water-Analysis-Report\Example Data\ARDEC2200_filter.csv", na_values=['na','NA','nd','ND','Nd','nD','U','u','data missing','Data missing','no data','No data'])
# df = pd.read_csv(filedialog.askopenfilename(), na_values=['nd','ND','Nd','nD','U','u','data missing','Data missing','no data','No data'])
# Data Cleaning
## Make Station ID Uppercase
df['Station']=df['Station'].str.upper()
## Assign 'Date' column name from csv
date_name = 'Date' #or 'Start Date'
## Extract 'Year' into its own column
df['Year'] = pd.DatetimeIndex(df[date_name]).year
## clean whitespace from column names
df.columns = df.columns.str.replace(' ', '')
dfx = df.copy()
## Rename Columns for 'real' values
water_dict = {
'Location': 'Location',
'Date': 'Date',
'Irr/Storm': 'Irr./Storm Event',
'Station': 'Sample Type',
'Rep': 'Replication ID',
'ID': 'Sample ID',
'op': 'Orthophosphate (mg/L)',
'no3': 'NO3 (mg/L)',
'nh4': 'NH4 (mg/L)',
'tkn': 'TKN (mg/L)',
'tp': 'Total Phosphorus (mg/L)',
'selenium': 'Selenium (mg/L)',
'ECOLI': 'E. coli. (Ct./100 mL)',
'fecal': 'Fecal Coliform (Ct./100 mL)', #confirm w/ manny on coliform
'tss': 'TSS (g/L)',
'Flow': 'Flow (GPM)', #confirm w/ manny on units
'Notes': 'Notes',
'Year': 'Year'
}
df.rename(
columns=water_dict,
inplace = True
)
## List of columns for reference
#df.columns
All samples are identified using abbreviations corresponding to locations as noted in the legend below:
legend = [["A2",'ARDEC 2200 Field'],
]
legendTable = tabulate.tabulate(legend, tablefmt='html', headers=['Sample ID Label','Location'])
legendTable
| Sample ID Label | Location |
|---|---|
| A2 | ARDEC 2200 Field |
Use the below boxplot to graph various water analytes stratified by sample type (i.e., Inflow, Outflow, etc.). Analytes can be selected by using the dropdown menu on the top left of the graph. Hover over points with your mouse for additional information.
# Helpful link: https://stackoverflow.com/questions/69354451/how-to-add-a-button-to-a-plotly-express-graph-to-update-a-specific-value
cols = df.columns.values.tolist()
fig = go.Figure()
for col in cols:
figpx = px.violin(df.assign(Plot=col), #change this to px.box for only boxplot
#x='Station',
x='Sample Type',
y=col,
#color='Station',
color='Sample Type',
points='all',
box=True, # comment this line out for only boxplot
hover_name='Sample ID',
hover_data=['Plot']).update_traces(visible=False)
fig.add_traces(figpx.data)
fig.update_layout(
updatemenus=[
{
"buttons":
[
{
"label": k,
"method": "update",
"args":
[
{"visible": [t.customdata[0][0]==k for t in fig.data]},
],
}
for k in cols
]
}
],
height=700
).update_traces(visible=True, selector=lambda t: t.customdata[0][0]==cols[0] )
fig.show()
Use the below Scatter plot to graph various water analytes against each other to see potential relationships. Analytes can be selected by using the dropdown menus near top left of the graph. Hover over points with your mouse for additional information.
# Helpful link: https://stackoverflow.com/questions/69242033/build-a-plotly-scatterplot-with-two-drop-down-buttons-one-for-x-and-one-for-y-ax
# My post: https://stackoverflow.com/questions/71028751/plotly-scatter-plot-with-dropdown-menu-and-color-by-group/71069032#71069032
cols = df.columns.values.tolist()
# make list of default plotly colors in hex
plotly_colors=[
'#1f77b4', # muted blue
'#ff7f0e', # safety orange
'#2ca02c', # cooked asparagus green
'#d62728', # brick red
'#9467bd', # muted purple
'#8c564b', # chestnut brown
'#e377c2', # raspberry yogurt pink
'#7f7f7f', # middle gray
'#bcbd22', # curry yellow-green
'#17becf' # blue-teal
]
# create dictionary to associate colors with unique categories
color_dict = dict(zip(df['Sample Type'].unique(),plotly_colors))
# map new column with hex colors to pass to go.Scatter()
df['hex']= df['Sample Type'].map(color_dict)
# initialize scatter plot
fig = go.Figure(
go.Scatter(
x=df['Location'],
y=df['Location'],
text=df['Sample Type'],
marker=dict(color=df['hex'], showscale=True),
mode="markers"
)
)
# initialize dropdown menus
fig.update_layout(
updatemenus=[
{
"buttons": [
{
"label": f"x - {x}",
"method": "update",
"args": [
{"x": [df[x]]},
{"xaxis": {"title": x}},
],
}
for x in cols
]
},
{
"buttons": [
{
"label": f"y - {x}",
"method": "update",
"args": [
{"y": [df[x]]},
{"yaxis": {"title": x}}
],
}
for x in cols
],
"y": 0.9,
},
],
margin={"l": 0, "r": 0, "t": 25, "b": 0},
height=700
)
fig.show()
Use the below scatter matrix to look at potential relationships between numerical variables.
Points are stratified by sample type (i.e., Inflow, Outflow, etc.).
Hover over points with your mouse for additional information.
dimensions=['op', 'no3', 'nh4', 'tkn', 'tp', 'selenium', 'ECOLI', 'fecal',
'tss', 'Flow']
#new_dimensions = [water_dict[i] for i in dimensions]
#new_dimensions
dfx
fig = px.scatter_matrix(dfx, dimensions, color='Station', height=1250)
fig.show()
df
| Location | Date | Irr./Storm Event | Sample Type | Replication ID | Sample ID | Orthophosphate (mg/L) | NO3 (mg/L) | NH4 (mg/L) | TKN (mg/L) | Total Phosphorus (mg/L) | Selenium (mg/L) | E. coli. (Ct./100 mL) | Fecal Coliform (Ct./100 mL) | TSS (g/L) | Flow (GPM) | Notes | Year | hex | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ARDEC 2200 | 6/23/2019 | S1 | OUTFLOW | 1 | A2-S1-OT | 0 | 5.0 | 0 | 3.40 | 0.480 | 0.0022 | 0.0 | 0.0 | 1.40 | NaN | NaN | 2019 | #1f77b4 |
| 1 | ARDEC 2200 | 6/23/2019 | S1 | OUTFLOW | 2 | A2-S1-OT-D | 0 | 4.9 | 0 | 2.60 | 0.460 | 0.0027 | 5000.0 | 5000.0 | 0.92 | NaN | NaN | 2019 | #1f77b4 |
| 2 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 1 | A2-S2-OT-C1 | 0 | 2.8 | 0 | 40.00 | 1.500 | 0.0160 | 800.0 | 1000.0 | 15.82 | 2.212752e+04 | Part 1 | 2019 | #1f77b4 |
| 3 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 2 | A2-S2-OT-C1-D | 0 | 2.8 | 0 | 28.00 | 2.800 | 0.0110 | 600.0 | 600.0 | 16.26 | 2.212752e+04 | Part 1 Dup | 2019 | #1f77b4 |
| 4 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 1 | A2-S2-OT-C2 | 0 | 2.5 | 0 | 20.00 | 1.900 | 0.0120 | 400.0 | 400.0 | 9.11 | 1.907793e+04 | Part 2 | 2019 | #1f77b4 |
| 5 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 2 | A2-S2-OT-C2-D | 0 | 2.5 | 0 | 21.00 | 2.200 | 0.0100 | 900.0 | 1000.0 | 8.94 | 1.907793e+04 | Part 2 Dup | 2019 | #1f77b4 |
| 6 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 1 | A2-S2-OT-C3 | 0 | 2.2 | 0 | 18.00 | 2.100 | 0.0073 | 900.0 | 1400.0 | 6.00 | 9.303694e+03 | Part 3 | 2019 | #1f77b4 |
| 7 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 2 | A2-S2-OT-C3-D | 0 | 2.2 | 0 | 18.00 | 1.300 | 0.0075 | 1200.0 | 1500.0 | 6.09 | 9.303694e+03 | Part 3 Dup | 2019 | #1f77b4 |
| 8 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 1 | A2-S2-OT-C4 | 0 | 2.4 | 0 | 13.00 | 1.600 | 0.0077 | 1400.0 | 1500.0 | 5.22 | 1.207985e+04 | Part 4 | 2019 | #1f77b4 |
| 9 | ARDEC 2200 | 7/5/2019 | S2 | OUTFLOW | 2 | A2-S2-OT-C4-D | 0 | 2.3 | 0 | 10.00 | 1.200 | 0.0069 | 1400.0 | 1600.0 | 4.92 | 1.207985e+04 | Part 4 Dup | 2019 | #1f77b4 |
| 10 | ARDEC 2200 | 7/12/2019 | 1 | INFLOW | 1 | A2-01-IN | 0 | 10.0 | 0 | 4.20 | 0.440 | 0.0140 | 100.0 | 200.0 | 2.62 | NaN | Inflow- Grab | 2019 | #ff7f0e |
| 11 | ARDEC 2200 | 7/12/2019 | 1 | INFLOW | 2 | A2-01-IN-D | 0 | 10.0 | 0 | 4.80 | 0.048 | 0.0140 | NaN | NaN | 2.79 | NaN | Inflow- Grab - Dup | 2019 | #ff7f0e |
| 12 | ARDEC 2200 | 7/12/2019 | 1 | OUTFLOW | 1 | A2-01-OT | 0 | 9.9 | 0 | 1.20 | 0.460 | 0.0100 | 500.0 | 700.0 | 0.23 | 2.610865e+05 | NaN | 2019 | #1f77b4 |
| 13 | ARDEC 2200 | 7/12/2019 | 1 | OUTFLOW | 2 | A2-01-OT-D | 0 | 10.0 | 0 | 0.97 | 0.320 | 0.0120 | 200.0 | 200.0 | 0.19 | 2.610865e+05 | NaN | 2019 | #1f77b4 |
| 14 | ARDEC 2200 | 7/23/2019 | 2 | OUTFLOW | 1 | A2-O2-OT | 0 | 9.1 | 0 | 1.00 | 0.120 | 0.0098 | NaN | NaN | 0.12 | 1.889554e+05 | NaN | 2019 | #1f77b4 |
| 15 | ARDEC 2200 | 7/23/2019 | 2 | OUTFLOW | 2 | A2-02-OT-D | 0 | 9.1 | 0 | 1.30 | 0.130 | 0.0095 | NaN | NaN | 0.07 | 1.889554e+05 | NaN | 2019 | #1f77b4 |
| 16 | ARDEC 2200 | 8/3/2019 | 3 | OUTFLOW | 1 | A2-03-OT | 0 | 9.1 | 0 | 1.20 | 0.170 | 0.0092 | NaN | NaN | 0.10 | 5.394982e+04 | NaN | 2019 | #1f77b4 |
| 17 | ARDEC 2200 | 8/3/2019 | 3 | OUTFLOW | 2 | A2-03-OT-D | 0 | 9.1 | 0 | 1.50 | 0.170 | 0.0100 | NaN | NaN | 0.12 | 5.394982e+04 | NaN | 2019 | #1f77b4 |
| 18 | ARDEC 2200 | 8/3/2019 | 3 | GATED PIPE | 1 | A2-03-WL | 0 | 9.3 | 0 | 0.00 | 0.000 | 0.0093 | NaN | NaN | 0.02 | NaN | NaN | 2019 | #2ca02c |
| 19 | ARDEC 2200 | 8/3/2019 | 3 | INFLOW | 1 | A2-03-IN | 0 | 9.2 | 0 | 3.00 | 0.200 | 0.0110 | NaN | NaN | 1.02 | NaN | NaN | 2019 | #ff7f0e |
| 20 | ARDEC 2200 | 8/3/2019 | 3 | INFLOW | 2 | A2-03-IN-D | 0 | 9.2 | 0 | 3.00 | 0.210 | 0.0110 | NaN | NaN | 1.05 | NaN | NaN | 2019 | #ff7f0e |
| 21 | ARDEC 2200 | 8/6/2019 | 4 | OUTFLOW | 1 | A2-04-OT | 0 | 8.1 | 0 | 0.00 | 0.160 | 0.0100 | 800.0 | 900.0 | 0.14 | 1.429689e+06 | NaN | 2019 | #1f77b4 |
| 22 | ARDEC 2200 | 8/6/2019 | 4 | OUTFLOW | 2 | A2-04-OT-D | 0 | 8.1 | 0 | 0.00 | 0.150 | 0.0096 | 790.0 | 950.0 | 0.14 | 1.429689e+06 | NaN | 2019 | #1f77b4 |
| 23 | ARDEC 2200 | 8/6/2019 | 4 | INFLOW | 1 | A2-04-IN | 0 | 8.3 | 0 | 2.70 | 0.240 | 0.0099 | 150.0 | 150.0 | 1.08 | NaN | NaN | 2019 | #ff7f0e |
| 24 | ARDEC 2200 | 8/6/2019 | 4 | INFLOW | 2 | A2-04-IN-D | 0 | 8.2 | 0 | 3.10 | 0.420 | 0.0100 | 180.0 | 180.0 | 2.14 | NaN | NaN | 2019 | #ff7f0e |
| 25 | ARDEC 2200 | 8/18/2019 | 5 | OUTFLOW | 1 | A2-05-OT | 0 | NaN | 0 | 1.40 | 0.240 | NaN | NaN | NaN | 0.13 | 3.518484e+05 | NaN | 2019 | #1f77b4 |
| 26 | ARDEC 2200 | 8/18/2019 | 5 | OUTFLOW | 2 | A2-05-OT-D | 0 | NaN | 0 | 1.60 | 0.340 | NaN | NaN | NaN | 0.16 | 3.518484e+05 | NaN | 2019 | #1f77b4 |
| 27 | ARDEC 2200 | 8/22/2019 | 6 | OUTFLOW | 1 | A2-06-OT | 0 | 7.7 | 0 | 0.00 | 0.100 | 0.0099 | NaN | NaN | 0.12 | 9.881645e+05 | NaN | 2019 | #1f77b4 |
| 28 | ARDEC 2200 | 8/22/2019 | 6 | OUTFLOW | 2 | A2-06-OT-D | 0 | 7.7 | 0 | 0.00 | 0.190 | 0.0097 | NaN | NaN | 0.12 | 9.881645e+05 | NaN | 2019 | #1f77b4 |
| 29 | ARDEC 2200 | 8/22/2019 | 6 | GATED PIPE | 1 | A2-06-WL | 0 | 7.7 | 0 | 0.00 | 0.000 | 0.0100 | NaN | NaN | 0.04 | NaN | NaN | 2019 | #2ca02c |
| 30 | ARDEC 2200 | 8/22/2019 | 6 | INFLOW | 1 | A2-06-IN | 0 | 7.7 | 0 | 2.40 | 0.170 | 0.0100 | NaN | NaN | 0.76 | NaN | NaN | 2019 | #ff7f0e |
| 31 | ARDEC 2200 | 8/22/2019 | 6 | INFLOW | 2 | A2-06-IN-D | 0 | 7.7 | 0 | 2.50 | 0.250 | 0.0098 | NaN | NaN | 0.71 | NaN | NaN | 2019 | #ff7f0e |